Case Data

# read in data using API from divs in html
cdc_cases = 
  GET("www.cdc.gov//coronavirus/2019-ncov/json/new-cases-chart-data.json") %>%
  content("text") %>%
  jsonlite::fromJSON() %>%
  as_tibble() %>%
  select(-V1)

# transpose the data into desired format
cdc_trans = as_tibble(t(as.matrix(cdc_cases))) %>%
  rename(date = V1, new_cases = V2) %>%
  mutate(new_cases = as.numeric(new_cases),
         new_date = as.Date(date, format = "%m/%d/%y"))

cdc_trans %>%
  ggplot(aes(x = new_date, y = new_cases)) +
  geom_point() + geom_line() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  labs(x = "Date", y = "Number of New COVID-19 Cases")

Death Data by Day and State

# read in data using API
cdc_death = 
  GET("https://data.cdc.gov/resource/r8kw-7aab.json") %>%
  content("text") %>%
  jsonlite::fromJSON() %>%
  as_tibble() %>%
  mutate(data_as_of = substr(data_as_of, 1, 10),
         start_week = substr(start_week, 1, 10),
         end_week = substr(end_week, 1, 10),
         covid_deaths = as.numeric(covid_deaths),
         total_deaths = as.numeric(total_deaths),
         percent_of_expected_deaths = as.numeric(percent_of_expected_deaths),
         pneumonia_deaths = as.numeric(pneumonia_deaths),
         pneumonia_and_covid_deaths = as.numeric(pneumonia_and_covid_deaths),
         influenza_deaths = as.numeric(influenza_deaths),
         pneumonia_influenza_or_covid = as.numeric(pneumonia_influenza_or_covid))

# name states you want to see-is this something we can do in the dashboard?
# enter up to x amount of states?
states_select = c("New York", "New Jersey", "California", "Maryland", "Arizona", "Washington", "New Jersey")

death_plot = cdc_death %>%
#  filter(state %in% states_select) %>%
  filter(state != "United States" & state != "New York City") %>%
  ggplot(aes(x = end_week, y = covid_deaths, color = state, group = state)) +
  geom_point() + geom_line() +
  theme_classic() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  labs(x = "Week End Date", y = "Number of COVID-19 Deaths", color = "State")

ggplotly(death_plot)

Case and Death Data

# will need to figure out automating the date columns
# pivot dates to date variable instead of variable name
# mutate date variable into date format
# calculate lag difference to get new cases and deaths


# read in case data
cases = read_csv("https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_confirmed_usafacts.csv", col_names = TRUE) %>%
  janitor::clean_names() %>%
  pivot_longer(x1_22_20:x5_13_20,
               names_to = "date",
               values_to = "total_cases") %>%
  mutate(date = substr(date, 2, 8),
         date = str_replace_all(date, "_", "/"),
         date = as.Date(date, "%m/%d/%y")) %>%
  group_by(state, date) %>%
  summarize(total_cases = sum(total_cases)) %>%
  group_by(state) %>%
  mutate(new_cases = if_else(date != "2020-01-22", total_cases - lag(total_cases), total_cases))



# read in death data
deaths = read_csv("https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_deaths_usafacts.csv", col_names = TRUE) %>%
  janitor::clean_names() %>%
  pivot_longer(x1_22_20:x5_13_20,
               names_to = "date",
               values_to = "total_deaths") %>%
  mutate(date = substr(date, 2, 8),
         date = str_replace_all(date, "_", "/"),
         date = as.Date(date, "%m/%d/%y")) %>%
  group_by(state, date) %>%
  summarize(total_deaths = sum(total_deaths)) %>%
  group_by(state) %>%
  mutate(new_deaths = if_else(date != "2020-01-22", total_deaths - lag(total_deaths), total_deaths))

# read in population data
pop = read_csv("https://usafactsstatic.blob.core.windows.net/public/data/covid-19/covid_county_population_usafacts.csv", col_names = TRUE) %>%
  janitor::clean_names() %>%
  group_by(state) %>%
  summarize(population = sum(population))

# join population counts to cases and deaths datasets for normalized counts
cases2 = left_join(cases, pop, by = "state") %>%
  mutate(new_cases_norm = (new_cases/population)*100000)

deaths2 = left_join(deaths, pop, by = "state") %>%
  mutate(new_deaths_norm = (new_deaths/population)*100000)

Make Plots

# these will only show one state at a time on the dashboard-will have drop down menu

# cases plot non-normal
cases_plot = cases2 %>%
   mutate(text_label = str_c("Date: ", date, '\nCases: ', new_cases)) %>%
  filter(state == "NY") %>%
  ggplot(aes(x = date, y = new_cases)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  theme_classic() +
  labs(
    x = "Date",
    y = "No. New Cases",
    title = "New COVID-19 Cases by Day") 

ggplotly(cases_plot)
# cases plot normal
cases_plot_norm = cases2 %>%
   mutate(text_label = str_c("Date: ", date, '\nCases per 100k: ', new_cases_norm)) %>%
  filter(state == "NY") %>%
  ggplot(aes(x = date, y = new_cases_norm)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  theme_classic() +
  labs(
    x = "Date",
    y = "No. New Cases per 100,000",
    title = "New COVID-19 Cases per 100,000 by Day"
  )

ggplotly(cases_plot_norm)
# death plot
death_plot = deaths2 %>%
   mutate(text_label = str_c("Date: ", date, '\nDeaths: ', new_deaths)) %>%
  filter(state == "NY") %>%
  ggplot(aes(x = date, y = new_deaths)) +
  geom_bar(stat = "identity", fill = "darkblue") +
  theme_classic() +
  labs(
    x = "Date",
    y = "No. New Deaths",
    title = "New COVID-19 Deaths by Day"
  )

ggplotly(death_plot)
# death plot norm
death_plot_norm = deaths2 %>%
  mutate(text_label = str_c("Date: ", date, '\nDeaths per 100k: ', new_deaths_norm)) %>%
  filter(state == "NY") %>%
  ggplot(aes(x = date, y = new_deaths_norm)) +
  geom_bar(stat = "identity", fill = "darkblue") +
  theme_classic() +
  labs(
    x = "Date",
    y = "No. New Deaths per 100,000",
    title = "New COVID-19 Deaths per 100,000 by Day"
  ) 

# need
ggplotly(death_plot_norm)